rm(list = ls())
options(stringsAsFactors = FALSE)
seed_to_use <- 216
set.seed(seed_to_use)
library(data.table)
library(ggplot2)
library(countrycode)
library(haven)
setwd("C:/Users/kurtz.61/Dropbox/marcus_oil/replication_code/programs")

oil_csv_to_load <- list.files("C:/Users/kurtz.61/Dropbox/marcus_oil/replication_code/programs/oil_prices", full.names = TRUE)
oil_grav <- list.files("C:/Users/kurtz.61/Dropbox/marcus_oil/replication_code/programs/oil_prices", full.names = FALSE)
unlist(strsplit(oil_grav[1], split = "Gravity_"))[2]

load_oil_csvs <- function(i){
  oil_csv <- fread(oil_csv_to_load[i], skip = 4L)
  oil_csv[, grav := unlist(strsplit(oil_grav[i], split = "Gravity_"))[2] ]
  oil_csv
}

# cutpoints for oil values
oil_gravs_list <- lapply(1:length(oil_csv_to_load), load_oil_csvs)
oil_gravs <- rbindlist(oil_gravs_list)
unique(oil_gravs$grav)
oil_gravs[, min_grav := ifelse(grav == "20.0_degrees_or_less_Annual.csv", 0, 
  ifelse(grav == "20.1_to_25.0_degrees_Annual.csv", 20.1, 
    ifelse(grav == "25.1_to_30.0_degrees_Annual.csv", 25.1, 
      ifelse(grav == "30.1_to_35.0_degrees_Annual.csv", 30.1, 
        ifelse(grav == "35.1_to_40.0_degrees_Annual.csv", 35.1, 
          ifelse(grav == "40.1_to_45.0_degrees_Annual.csv", 40.1, 
            ifelse(grav == "45.1_degrees_or_more_Annual.csv",45.1,  NA)))))))]


oil_gravs[,max_grav := ifelse(grav == "20.0_degrees_or_less_Annual.csv", 20.0, 
  ifelse(grav == "20.1_to_25.0_degrees_Annual.csv", 25.0, 
    ifelse(grav == "25.1_to_30.0_degrees_Annual.csv", 30.0, 
      ifelse(grav == "30.1_to_35.0_degrees_Annual.csv", 35.0, 
        ifelse(grav == "35.1_to_40.0_degrees_Annual.csv", 40.0, 
          ifelse(grav == "40.1_to_45.0_degrees_Annual.csv", 45.0,     
            ifelse(grav == "45.1_degrees_or_more_Annual.csv", 1000, NA)))))))]

setnames(oil_gravs, c("years", "dollars_per_barrel", "grav", "min_grav", "max_grav"))
load("C:/Users/kurtz.61/Dropbox/marcus_oil/replication_code/programs/oil_data_32_imputed.Rda")
#as.data.frame("C:/Users/kurtz.61/Dropbox/marcus_oil/data_for_analysis/working dataset after 01.29.21/final imputation runs/oil_data_32_imputed.rdata")
#oildata<oil_data_32_imputed
data.table(oil_data_32_imputed)

gravs <- unique(oil_gravs$grav)
oil_data_32_imputed$years <- as.integer(as.character(oil_data_32_imputed$years))
#factor_data_oil_32$country <- as.character(factor_data_oil_32$country)
#oildata$country <- as.character(oildata$country)
w <- oil_data_32_imputed[ oil_data_32_imputed$country == "abu dhabi", ]


 
 make_oil_prices <- function(i, dt_to_use = oil_data_32_imputed){

     grav_to_use <- gravs[i]
     grav_val <- oil_gravs[ grav == grav_to_use,]
     grav_val$grav <- NULL
     min_g <- unique(grav_val$min_grav)
     max_g <- unique(grav_val$max_grav)
     grav_dt <- dt_to_use[ dt_to_use$avgapigravity >= min_g & dt_to_use$avgapigravity <= max_g, ]
     oil_prices <- merge(grav_dt, grav_val, by = "years")

}

price_list <- lapply(1:length(gravs), make_oil_prices)
oil_prices <- rbindlist(price_list)


# using january 2010 dollar, account for inflation in prices
# basically, what is value of dollar in jan of that year vs. jan 2010
# bls calculator here: https://data.bls.gov/cgi-bin/cpicalc.pl
yrs <- 1980:2012
dol <- c(2.79, 2.49, 2.30, 2.22, 2.13, 2.05, 1.98, 1.95, 1.87, 1.79, 1.70, 1.61, 1.57, 1.52, 1.48, 1.44, 1.40, 1.36, 1.34, 1.32, 1.28, 1.24, 1.22, 1.19, 1.17, 1.14, 1.09, 1.07, 1.03, 1.03, 1.00, 0.98, 0.96)
infl_index <- data.table(years = yrs, infl_idx = dol)
oil_prices <- merge(oil_prices, infl_index, by = "years", all.x = TRUE)
oil_prices[, dollars_per_barrel_adj := dollars_per_barrel * infl_idx]
oil_prices[, dollars_by_field := dollars_per_barrel_adj * oil]

save(oil_prices, file = "C:/Users/kurtz.61/Dropbox/marcus_oil/replication_code/programs/oil_prices.rdata")
write_dta(oil_prices, path = "C:/Users/kurtz.61/Dropbox/marcus_oil/replication_code/programs/oil_prices.dta")


#######stop here#######

###### some basic diagnostics
load("~/data/code_for_marcus/oil_project/oil_melt2.rdata")
saudi_oil <- oil_melt33[ country == "saudi arabia" ]
max_val <- max(saudi_oil$oil, na.rm = TRUE)
unrealistic_oil_vals <- oil_melt32[ oil > max_val*.5]
unrealistic_oil_vals <- unrealistic_oil_vals[ country != "saudi arabia"]
oil_prices <- oil_prices[ !(unique_id %in% unrealistic_oil_vals$unique_id & years %in% unrealistic_oil_vals$years) ]

oil_prices$country <- ifelse(oil_prices$country == "argenti", "argentina", 
  oil_prices$country)
oil_prices$country <- ifelse(oil_prices$country == "vietm", "vietnam", 
  oil_prices$country)
oil_prices$country <- ifelse(oil_prices$country %in% c("abu dhabi","dubai","ras al khaimah","sharjah"), 
  "united arab emirates", oil_prices$country)
oil_prices$country <- ifelse(oil_prices$country == "gha", 
  "ghana", oil_prices$country)
oil_prices$country <- ifelse(oil_prices$country == "surime", 
  "suriname", oil_prices$country)
oil_prices$country <- ifelse(oil_prices$country == "cada", 
  "canada", oil_prices$country)

country_year_output_price <- oil_prices[, sum(dollars_by_field), by = .(country, years)]

country_year_output_price[, wb_code := countrycode(sourcevar = country, origin = "country.name", destination = "wb_api3c")]



save(country_year_output_price, file = "~/data/code_for_marcus/oil_project/country_year_output_price.rdata")


ggplot(data = country_year_output_price[ country == "saudi arabia"], aes(x = V1, y = years)) + geom_line()
